In this unit we discuss several approximate Bayesian methods that have been presented in the slides of unit D.2.
y <- c(1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0) # Failure
temp <- c(53, 57, 58, 63, 66, 67, 67, 67, 68, 69, 70, 70, 70, 70, 72, 73, 75, 75, 76, 76, 78, 79, 81) # Temperature (Farenheit)
X <- cbind(1, temp) # Design matrixWe will employ a relatively vague prior centered at \(0\), namely \(\beta \sim N(0, 100 I_p)\).
B <- diag(100, ncol(X)) # Prior covariance matrix
b <- rep(0, ncol(X)) # Prior meanlibrary(BayesLogit)
logit_Gibbs <- function(R, burn_in, y, X, B, b) {
p <- ncol(X)
n <- nrow(X)
out <- matrix(0, R, p) # Initialize an empty matrix to store the values
P <- solve(B) # Prior precision matrix
Pb <- P %*% b # Term appearing in the Gibbs sampling
Xy <- crossprod(X, y - 1 / 2)
# Initialization
beta <- rep(0, p)
# Iterative procedure
for (r in 1:(R + burn_in)) {
# Sampling the Pólya-gamma latent variables
eta <- c(X %*% beta)
omega <- rpg.devroye(num = n, h = 1, z = eta)
# Sampling beta
eig <- eigen(crossprod(X * sqrt(omega)) + P, symmetric = TRUE)
Sigma <- crossprod(t(eig$vectors) / sqrt(eig$values))
mu <- Sigma %*% (Xy + Pb)
A1 <- t(eig$vectors) / sqrt(eig$values)
beta <- mu + c(matrix(rnorm(1 * p), 1, p) %*% A1)
# Store the values after the burn-in period
if (r > burn_in) {
out[r - burn_in, ] <- beta
}
}
out
}set.seed(123)
# Running the MCMC
fit_MCMC <- logit_Gibbs(R = 50000, burn_in = 5000, y, X, B, b) # MCMC (gold standard)logit_Laplace <- function(y, X, B, b, tol = 1e-16, maxiter = 10000) {
P <- solve(B) # Prior precision matrix
Pb <- P %*% b # Term appearing in the Gibbs sampling
logpost <- numeric(maxiter)
Xy <- crossprod(X, y - 0.5)
# Initialization
beta <- solve(crossprod(X / 4, X) + P, Xy + Pb)
eta <- c(X %*% beta)
w <- tanh(eta / 2) / (2 * eta)
w[is.nan(w)] <- 0.25
# First value of the likelihood
logpost[1] <- sum(y * eta - log(1 + exp(eta))) - 0.5 * t(beta) %*% P %*% beta
# Iterative procedure
for (t in 2:maxiter) {
beta <- solve(qr(crossprod(X * w, X) + P), Xy + Pb)
eta <- c(X %*% beta)
w <- tanh(eta / 2) / (2 * eta)
w[is.nan(w)] <- 0.25
logpost[t] <- sum(y * eta - log(1 + exp(eta))) - 0.5 * t(beta) %*% P %*% beta
if (logpost[t] - logpost[t - 1] < tol) {
prob <- plogis(eta)
return(list(
mu = c(beta), Sigma = solve(crossprod(X * prob * (1 - prob), X) + P),
Convergence = cbind(Iteration = (1:t) - 1, logpost = logpost[1:t])
))
}
}
stop("The algorithm has not reached convergence")
}library(tictoc)
library(ggplot2)
tic()
fit_Laplace <- logit_Laplace(y, X, B, b)
toc()
0.035 sec elapsed
par(mfrow = c(1, 2))
plot(density(fit_MCMC[, 1]), xlab = expression(beta[1]), lty = "dotted", main = "")
curve(dnorm(x, fit_Laplace$mu[1], sqrt(fit_Laplace$Sigma[1, 1])), add = T)
plot(density(fit_MCMC[, 2]), xlab = expression(beta[2]), lty = "dotted", main = "")
curve(dnorm(x, fit_Laplace$mu[2], sqrt(fit_Laplace$Sigma[2, 2])), add = T)logit_CAVI <- function(y, X, B, b, tol = 1e-16, maxiter = 10000) {
# Compute the log-determinant of a matrix
ldet <- function(X) {
if (!is.matrix(X)) {
return(log(X))
}
determinant(X, logarithm = TRUE)$modulus
}
lowerbound <- numeric(maxiter)
p <- ncol(X)
n <- nrow(X)
P <- solve(B)
Pb <- c(P %*% b)
Pdet <- ldet(P)
# Initialization for omega equal to 0.25
P_vb <- crossprod(X * rep(1 / 4, n), X) + P
Sigma_vb <- solve(P_vb)
mu_vb <- Sigma_vb %*% (crossprod(X, y - 0.5) + Pb)
eta <- c(X %*% mu_vb)
xi <- sqrt(eta^2 + rowSums(X %*% Sigma_vb * X))
omega <- tanh(xi / 2) / (2 * xi)
omega[is.nan(omega)] <- 0.25
lowerbound[1] <- 0.5 * p + 0.5 * ldet(Sigma_vb) + 0.5 * Pdet - 0.5 * t(mu_vb - b) %*% P %*% (mu_vb - b) + sum((y - 0.5) * eta + log(plogis(xi)) - 0.5 * xi) - 0.5 * sum(diag(P %*% Sigma_vb))
# Iterative procedure
for (t in 2:maxiter) {
P_vb <- crossprod(X * omega, X) + P
Sigma_vb <- solve(P_vb)
mu_vb <- Sigma_vb %*% (crossprod(X, y - 0.5) + Pb)
# Update of xi
eta <- c(X %*% mu_vb)
xi <- sqrt(eta^2 + rowSums(X %*% Sigma_vb * X))
omega <- tanh(xi / 2) / (2 * xi)
omega[is.nan(omega)] <- 0.25
lowerbound[t] <- 0.5 * p + 0.5 * ldet(Sigma_vb) + 0.5 * Pdet - 0.5 * t(mu_vb - b) %*% P %*% (mu_vb - b) + sum((y - 0.5) * eta + log(plogis(xi)) - 0.5 * xi) - 0.5 * sum(diag(P %*% Sigma_vb))
if (abs(lowerbound[t] - lowerbound[t - 1]) < tol) {
return(list(
mu = c(mu_vb), Sigma = matrix(Sigma_vb, p, p),
Convergence = cbind(
Iteration = (1:t) - 1,
Lowerbound = lowerbound[1:t]
), xi = xi
))
}
}
stop("The algorithm has not reached convergence")
}library(tictoc)
tic()
fit_CAVI <- logit_CAVI(y, X, B, b)
toc()
0.059 sec elapsed
par(mfrow = c(1, 2))
plot(density(fit_MCMC[, 1]), xlab = expression(beta[1]), lty = "dotted", main = "", ylim = c(0,0.09))
curve(dnorm(x, fit_CAVI$mu[1], sqrt(fit_CAVI$Sigma[1, 1])), add = T)
plot(density(fit_MCMC[, 2]), xlab = expression(beta[2]), lty = "dotted", main = "", ylim = c(0,6))
curve(dnorm(x, fit_CAVI$mu[2], sqrt(fit_CAVI$Sigma[2, 2])), add = T)library(EPGLM)
tic()
fit_EP <- EPlogit(X, y, s = B[1, 1])
toc()
0.002 sec elapsed
par(mfrow = c(1, 2))
plot(density(fit_MCMC[, 1]), xlab = expression(beta[1]), lty = "dotted", main = "", ylim = c(0,0.09))
curve(dnorm(x, fit_EP$m[1], sqrt(fit_EP$V[1, 1])), add = T)
plot(density(fit_MCMC[, 2]), xlab = expression(beta[2]), lty = "dotted", main = "", ylim = c(0,6))
curve(dnorm(x, fit_EP$m[2], sqrt(fit_EP$V[2, 2])), add = T)Means <- data.frame(
MCMC = colMeans(fit_MCMC),
Laplace = fit_Laplace$mu,
VB = fit_CAVI$mu,
EP = fit_EP$m
)
knitr::kable(Means, digits = 4)| MCMC | Laplace | VB | EP |
|---|---|---|---|
| 11.8050 | 10.5527 | 11.0425 | 11.8669 |
| -0.1858 | -0.1665 | -0.1740 | -0.1869 |
mean_mcmc <- colMeans(fit_MCMC)
Errors <- data.frame(
Laplace = (fit_Laplace$mu - mean_mcmc),
VB = (fit_CAVI$mu - mean_mcmc),
EP = (fit_EP$m - mean_mcmc)
)
apply(Errors, 2, function(x) mean(abs(x)))
Laplace VB EP
0.63581238 0.38720446 0.03149474
knitr::kable(Errors, digits = 4)| Laplace | VB | EP |
|---|---|---|
| -1.2523 | -0.7626 | 0.0619 |
| 0.0193 | 0.0118 | -0.0011 |
Errors_perc <- data.frame(
Laplace = (fit_Laplace$mu - mean_mcmc) / mean_mcmc * 100,
VB = (fit_CAVI$mu - mean_mcmc) / mean_mcmc * 100,
EP = (fit_EP$m - mean_mcmc) / mean_mcmc * 100
)
apply(Errors_perc, 2, function(x) mean(abs(x)))
Laplace VB EP
10.4966927 6.4109456 0.5550979
knitr::kable(Errors_perc, digits = 4)| Laplace | VB | EP |
|---|---|---|
| -10.6084 | -6.4598 | 0.5244 |
| -10.3850 | -6.3620 | 0.5858 |
Sd <- data.frame(
MCMC = sqrt(diag(var(fit_MCMC))),
Laplace = sqrt(diag(fit_Laplace$Sigma)),
VB = sqrt(diag(fit_CAVI$Sigma)),
EP = sqrt(diag(fit_EP$V))
)
knitr::kable(Sd, digits = 4, row.names = F)| MCMC | Laplace | VB | EP |
|---|---|---|---|
| 5.3675 | 5.0465 | 4.3489 | 5.3215 |
| 0.0789 | 0.0741 | 0.0628 | 0.0783 |